SHAPEFILE and data
The shapefile data has been found here: http://gis-lab.info/qa/moscow-atd.html.
(This is the second ESRI shape file)
mo_shp <- readOGR(dsn = "shp_mo", layer = "mo", stringsAsFactors = FALSE)
OGR data source with driver: ESRI Shapefile
Source: "shp_mo", layer: "mo"
with 146 features
It has 7 fields
plot(mo_shp)

We also need the train dataset from Kaggle:
# KAGGLE DATA FILE
train_df <- fread("../input/train.csv", stringsAsFactors = FALSE)
# test_df <- fread("input/test.csv", stringsAsFactors = FALSE)
length(unique(train_df$sub_area))
[1] 146
The federal city of Moscow, Russia is divided into twelve administrative okrugs, which are in turn subdivided into districts (raions). (source Wikipedia)
There are 146 raions, which are the 146 different sub_area, which are the NAME of the shapefile.
The only issue is that the raion names don’t match. Those from Kaggle (sub_area) are in English while those on the shapefile (NAME) are in Russian.
For information: The other variable OKRUGS matches with ABBREV_AO. There are the 12 administrative okrugs.
I have created a dictionnary which matches everything.
NAME_to_subarea <- read.csv("NAME_to_subarea.csv", stringsAsFactors = FALSE)
head(NAME_to_subarea)
Then we only need to merge the new columns.
mo <- merge(mo_shp, NAME_to_subarea)
modata <- mo@data;modata
Plot the shapefile
factpal <- colorFactor(topo.colors(12), mo@data$OKRUG)
labels <- sprintf(
"<strong>%s</strong><br/>AO: %s",
mo@data$sub_area, mo@data$OKRUG
) %>% lapply(htmltools::HTML)
OKRUGS <- leaflet(mo) %>%
addTiles() %>%
addPolygons(color = "#444444", weight = 1, smoothFactor = 0.5,
opacity = 1.0, fillOpacity = 0.5,
fillColor = ~factpal(OKRUG),
highlightOptions = highlightOptions(color = "white", weight = 2,
bringToFront = TRUE),
label = labels,
labelOptions = labelOptions(
style = list("font-weight" = "normal", padding = "3px 8px"),
textsize = "15px",
direction = "auto")) %>%
addLegend(pal = factpal, values = ~OKRUG, opacity = 1)
OKRUGS
Plot some useful data
We want now to plot the price of the transactions.
data <- train_df %>%
filter(full_sq > 0) %>%
mutate(price_sq = price_doc/full_sq) %>%
group_by(sub_area) %>%
summarise(nb = n(),
mean_price_doc = mean(price_doc),
mean_price_sq = mean(price_sq))
mo <- merge(mo, data)
pal1 <- colorNumeric(
palette = "YlOrRd",
domain = mo@data$mean_price_doc)
pal2 <- colorNumeric(
palette = "YlOrRd",
domain = mo@data$mean_price_sq)
labels <- sprintf(
"<strong>%s</strong><br/>AO: %s. <br/>Mean sale price: %g. <br/>Square metre cost: %g. <br/>Number of transactions: %g",
mo@data$sub_area, mo@data$OKRUG, mo@data$mean_price_doc, mo@data$mean_price_sq, mo@data$nb
) %>% lapply(htmltools::HTML)
Square meter cost
map1 <- leaflet(mo) %>%
addTiles() %>%
addPolygons(color = "#444444", weight = 1, smoothFactor = 0.5,
opacity = 1.0, fillOpacity = 0.5,
fillColor = ~pal1(mean_price_doc),
highlightOptions = highlightOptions(color = "white", weight = 2,
bringToFront = TRUE),
label = labels,
labelOptions = labelOptions(
style = list("font-weight" = "normal", padding = "3px 8px"),
textsize = "15px",
direction = "auto")) %>%
addLegend(pal = pal1, values = ~mean_price_doc, opacity = 1, title = "Average selling prices")
map1
Mean price sale
map2 <- leaflet(mo) %>%
addTiles() %>%
addPolygons(color = "#444444", weight = 1, smoothFactor = 0.5,
opacity = 1.0, fillOpacity = 0.5,
fillColor = ~pal2(mean_price_sq),
highlightOptions = highlightOptions(color = "white", weight = 2,
bringToFront = TRUE),
label = labels,
labelOptions = labelOptions(
style = list("font-weight" = "normal", padding = "3px 8px"),
textsize = "15px",
direction = "auto")) %>%
addLegend(pal = pal2, values = ~mean_price_sq, opacity = 1, title = "Average selling prices")
map2
LS0tCnRpdGxlOiAiR2VvbWF0aWNzIGZvciBLYWdnbGUgU2JlcmJhbmsgUnVzc2lhbiBIb3VzaW5nIE1hcmtldCBjb21wZXRpdGlvbnMiCm91dHB1dDoKICBodG1sX25vdGVib29rOgogICAgZmlnX2NhcHRpb246IHllcwogICAgZmlnX2hlaWdodDogNQogICAgZmlnX3dpZHRoOiA4CiAgICB0aGVtZTogeWV0aQogICAgdG9jOiB5ZXMKICBodG1sX2RvY3VtZW50OgogICAgZmlnX2hlaWdodDogNQogICAgZmlnX3dpZHRoOiA4Ci0tLQoKIyBUaGUgY29tcGV0aXRpb24KClRoZSBjb21wZXRpdGlvbiA6IFtTYmVyYmFuayBSdXNzaWFuIEhvdXNpbmcgTWFya2V0XShodHRwczovL3d3dy5rYWdnbGUuY29tL2Mvc2JlcmJhbmstcnVzc2lhbi1ob3VzaW5nLW1hcmtldCkgIApDYW4geW91IHByZWRpY3QgcmVhbHR5IHByaWNlIGZsdWN0dWF0aW9ucyBpbiBSdXNzaWHigJlzIHZvbGF0aWxlIGVjb25vbXk/ICAKCgpgYGB7ciwgaW5jbHVkZT1GQUxTRX0KIyBQQUNLQUdFIExPQURJTkcKbGlicmFyeShkcGx5cikKbGlicmFyeShzcCkKbGlicmFyeShyZ2RhbCkKbGlicmFyeShsZWFmbGV0KQpsaWJyYXJ5KGRhdGEudGFibGUpCmxpYnJhcnkoRFQpCgoKYWRkVGlsZXMgPSBmdW5jdGlvbiAobWFwLCB1cmxUZW1wbGF0ZSA9ICJodHRwOi8ve3N9LnRpbGUub3BlbnN0cmVldG1hcC5vcmcve3p9L3t4fS97eX0ucG5nIiwKYXR0cmlidXRpb24gPSBOVUxMLCBsYXllcklkID0gTlVMTCwgZ3JvdXAgPSBOVUxMLCBvcHRpb25zID0gdGlsZU9wdGlvbnMoKSkKewpvcHRpb25zJGF0dHJpYnV0aW9uID0gYXR0cmlidXRpb24KaWYgKG1pc3NpbmcodXJsVGVtcGxhdGUpICYmIGlzLm51bGwob3B0aW9ucyRhdHRyaWJ1dGlvbikpCm9wdGlvbnMkYXR0cmlidXRpb24gPSBwYXN0ZSgiwqkgT3BlblN0cmVldE1hcCIsCiJjb250cmlidXRvcnMsIENDLUJZLVNBIikKaW52b2tlTWV0aG9kKG1hcCwgZ2V0TWFwRGF0YShtYXApLCAiYWRkVGlsZXMiLCB1cmxUZW1wbGF0ZSwKbGF5ZXJJZCwgZ3JvdXAsIG9wdGlvbnMpCn0KYGBgCgojIFNIQVBFRklMRSBhbmQgZGF0YQoKVGhlIHNoYXBlZmlsZSBkYXRhIGhhcyBiZWVuIGZvdW5kIGhlcmU6IFtodHRwOi8vZ2lzLWxhYi5pbmZvL3FhL21vc2Nvdy1hdGQuaHRtbF0oaHR0cDovL2dpcy1sYWIuaW5mby9xYS9tb3Njb3ctYXRkLmh0bWwpLiAgCihUaGlzIGlzIHRoZSBzZWNvbmQgKipFU1JJIHNoYXBlKiogZmlsZSkKCmBgYHtyfQptb19zaHAgPC0gcmVhZE9HUihkc24gPSAic2hwX21vIiwgbGF5ZXIgPSAibW8iLCBzdHJpbmdzQXNGYWN0b3JzID0gRkFMU0UpCnBsb3QobW9fc2hwKQpgYGAKCldlIGFsc28gbmVlZCB0aGUgdHJhaW4gZGF0YXNldCBmcm9tIEthZ2dsZToKYGBge3J9CiMgS0FHR0xFIERBVEEgRklMRQp0cmFpbl9kZiA8LSBmcmVhZCgiLi4vaW5wdXQvdHJhaW4uY3N2Iiwgc3RyaW5nc0FzRmFjdG9ycyA9IEZBTFNFKQojIHRlc3RfZGYgPC0gZnJlYWQoImlucHV0L3Rlc3QuY3N2Iiwgc3RyaW5nc0FzRmFjdG9ycyA9IEZBTFNFKQpgYGAKCmBgYHtyfQpsZW5ndGgodW5pcXVlKHRyYWluX2RmJHN1Yl9hcmVhKSkKYGBgCgpUaGUgZmVkZXJhbCBjaXR5IG9mIE1vc2NvdywgUnVzc2lhIGlzIGRpdmlkZWQgaW50byB0d2VsdmUgYWRtaW5pc3RyYXRpdmUgb2tydWdzLCB3aGljaCBhcmUgaW4gdHVybiBzdWJkaXZpZGVkIGludG8gZGlzdHJpY3RzIChyYWlvbnMpLiBbKHNvdXJjZSBXaWtpcGVkaWEpXShodHRwczovL2VuLndpa2lwZWRpYS5vcmcvd2lraS9BZG1pbmlzdHJhdGl2ZV9kaXZpc2lvbnNfb2ZfTW9zY293KSAgClRoZXJlIGFyZSAxNDYgcmFpb25zLCB3aGljaCBhcmUgdGhlICoqMTQ2IGRpZmZlcmVudCBzdWJfYXJlYSoqLCB3aGljaCBhcmUgdGhlICoqTkFNRSoqIG9mIHRoZSBzaGFwZWZpbGUuCgpUaGUgb25seSBpc3N1ZSBpcyB0aGF0IHRoZSByYWlvbiBuYW1lcyBkb24ndCBtYXRjaC4gVGhvc2UgZnJvbSBLYWdnbGUgKCoqc3ViX2FyZWEqKikgYXJlIGluIEVuZ2xpc2ggd2hpbGUgdGhvc2Ugb24gdGhlIHNoYXBlZmlsZSAoKipOQU1FKiopIGFyZSBpbiBSdXNzaWFuLiAgCkZvciBpbmZvcm1hdGlvbjogVGhlIG90aGVyIHZhcmlhYmxlICoqT0tSVUdTKiogbWF0Y2hlcyB3aXRoICoqQUJCUkVWX0FPKiouIFRoZXJlIGFyZSB0aGUgMTIgYWRtaW5pc3RyYXRpdmUgb2tydWdzLgoKCkkgaGF2ZSBjcmVhdGVkIGEgZGljdGlvbm5hcnkgd2hpY2ggbWF0Y2hlcyBldmVyeXRoaW5nLiAKCmBgYHtyLCBlY2hvPVRSVUV9Ck5BTUVfdG9fc3ViYXJlYSA8LSByZWFkLmNzdigiTkFNRV90b19zdWJhcmVhLmNzdiIsIHN0cmluZ3NBc0ZhY3RvcnMgPSBGQUxTRSkKaGVhZChOQU1FX3RvX3N1YmFyZWEpCmBgYAoKVGhlbiB3ZSBvbmx5IG5lZWQgdG8gbWVyZ2UgdGhlIG5ldyBjb2x1bW5zLgoKYGBge3J9Cm1vIDwtIG1lcmdlKG1vX3NocCwgTkFNRV90b19zdWJhcmVhKQptb2RhdGEgPC0gbW9AZGF0YTttb2RhdGEKYGBgCgojIFBsb3QgdGhlIHNoYXBlZmlsZQpgYGB7cn0KZmFjdHBhbCA8LSBjb2xvckZhY3Rvcih0b3BvLmNvbG9ycygxMiksIG1vQGRhdGEkT0tSVUcpCgpsYWJlbHMgPC0gc3ByaW50ZigKICAiPHN0cm9uZz4lczwvc3Ryb25nPjxici8+QU86ICVzIiwKICBtb0BkYXRhJHN1Yl9hcmVhLCBtb0BkYXRhJE9LUlVHCikgJT4lIGxhcHBseShodG1sdG9vbHM6OkhUTUwpCgoKT0tSVUdTIDwtIGxlYWZsZXQobW8pICU+JSAKICBhZGRUaWxlcygpICU+JQogIGFkZFBvbHlnb25zKGNvbG9yID0gIiM0NDQ0NDQiLCB3ZWlnaHQgPSAxLCBzbW9vdGhGYWN0b3IgPSAwLjUsCiAgICAgICAgICAgICAgb3BhY2l0eSA9IDEuMCwgZmlsbE9wYWNpdHkgPSAwLjUsCiAgICAgICAgICAgICAgZmlsbENvbG9yID0gfmZhY3RwYWwoT0tSVUcpLAogICAgICAgICAgICAgIGhpZ2hsaWdodE9wdGlvbnMgPSBoaWdobGlnaHRPcHRpb25zKGNvbG9yID0gIndoaXRlIiwgd2VpZ2h0ID0gMiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBicmluZ1RvRnJvbnQgPSBUUlVFKSwKICAgICAgICAgICAgICBsYWJlbCA9IGxhYmVscywKICAgICAgICAgICAgICBsYWJlbE9wdGlvbnMgPSBsYWJlbE9wdGlvbnMoCiAgICAgICAgICAgICAgICBzdHlsZSA9IGxpc3QoImZvbnQtd2VpZ2h0IiA9ICJub3JtYWwiLCBwYWRkaW5nID0gIjNweCA4cHgiKSwKICAgICAgICAgICAgICAgIHRleHRzaXplID0gIjE1cHgiLAogICAgICAgICAgICAgICAgZGlyZWN0aW9uID0gImF1dG8iKSkgJT4lCiAgYWRkTGVnZW5kKHBhbCA9IGZhY3RwYWwsIHZhbHVlcyA9IH5PS1JVRywgb3BhY2l0eSA9IDEpCk9LUlVHUwpgYGAKCiMgUGxvdCBzb21lIHVzZWZ1bCBkYXRhIHsudGFic2V0fQoKV2Ugd2FudCBub3cgdG8gcGxvdCB0aGUgcHJpY2Ugb2YgdGhlIHRyYW5zYWN0aW9ucy4KCmBgYHtyfQpkYXRhIDwtIHRyYWluX2RmICU+JQogIGZpbHRlcihmdWxsX3NxID4gMCkgJT4lCiAgbXV0YXRlKHByaWNlX3NxID0gcHJpY2VfZG9jL2Z1bGxfc3EpICU+JQogIGdyb3VwX2J5KHN1Yl9hcmVhKSAlPiUKICBzdW1tYXJpc2UobmIgPSBuKCksCiAgICAgICAgICAgIG1lYW5fcHJpY2VfZG9jID0gbWVhbihwcmljZV9kb2MpLAogICAgICAgICAgICBtZWFuX3ByaWNlX3NxID0gbWVhbihwcmljZV9zcSkpCgptbyA8LSBtZXJnZShtbywgZGF0YSkKYGBgCgpgYGB7cn0KcGFsMSA8LSBjb2xvck51bWVyaWMoCiAgcGFsZXR0ZSA9ICJZbE9yUmQiLAogIGRvbWFpbiA9IG1vQGRhdGEkbWVhbl9wcmljZV9kb2MpCgpwYWwyIDwtIGNvbG9yTnVtZXJpYygKICBwYWxldHRlID0gIllsT3JSZCIsCiAgZG9tYWluID0gbW9AZGF0YSRtZWFuX3ByaWNlX3NxKQoKbGFiZWxzIDwtIHNwcmludGYoCiAgIjxzdHJvbmc+JXM8L3N0cm9uZz48YnIvPkFPOiAlcy4gPGJyLz5NZWFuIHNhbGUgcHJpY2U6ICVnLiA8YnIvPlNxdWFyZSBtZXRyZSBjb3N0OiAlZy4gPGJyLz5OdW1iZXIgb2YgdHJhbnNhY3Rpb25zOiAlZyIsCiAgbW9AZGF0YSRzdWJfYXJlYSwgbW9AZGF0YSRPS1JVRywgbW9AZGF0YSRtZWFuX3ByaWNlX2RvYywgbW9AZGF0YSRtZWFuX3ByaWNlX3NxLCBtb0BkYXRhJG5iCikgJT4lIGxhcHBseShodG1sdG9vbHM6OkhUTUwpCgpgYGAKCiMjIFNxdWFyZSBtZXRlciBjb3N0CgpgYGB7cn0KbWFwMSA8LSBsZWFmbGV0KG1vKSAlPiUgCiAgYWRkVGlsZXMoKSAlPiUKICBhZGRQb2x5Z29ucyhjb2xvciA9ICIjNDQ0NDQ0Iiwgd2VpZ2h0ID0gMSwgc21vb3RoRmFjdG9yID0gMC41LAogICAgICAgICAgICAgIG9wYWNpdHkgPSAxLjAsIGZpbGxPcGFjaXR5ID0gMC41LAogICAgICAgICAgICAgIGZpbGxDb2xvciA9IH5wYWwxKG1lYW5fcHJpY2VfZG9jKSwKICAgICAgICAgICAgICBoaWdobGlnaHRPcHRpb25zID0gaGlnaGxpZ2h0T3B0aW9ucyhjb2xvciA9ICJ3aGl0ZSIsIHdlaWdodCA9IDIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgYnJpbmdUb0Zyb250ID0gVFJVRSksCiAgICAgICAgICAgICAgbGFiZWwgPSBsYWJlbHMsCiAgICAgICAgICAgICAgbGFiZWxPcHRpb25zID0gbGFiZWxPcHRpb25zKAogICAgICAgICAgICAgICAgc3R5bGUgPSBsaXN0KCJmb250LXdlaWdodCIgPSAibm9ybWFsIiwgcGFkZGluZyA9ICIzcHggOHB4IiksCiAgICAgICAgICAgICAgICB0ZXh0c2l6ZSA9ICIxNXB4IiwKICAgICAgICAgICAgICAgIGRpcmVjdGlvbiA9ICJhdXRvIikpICU+JQogIGFkZExlZ2VuZChwYWwgPSBwYWwxLCB2YWx1ZXMgPSB+bWVhbl9wcmljZV9kb2MsIG9wYWNpdHkgPSAxLCB0aXRsZSA9ICJBdmVyYWdlIHNlbGxpbmcgcHJpY2VzIikKbWFwMQpgYGAKCiMjIE1lYW4gcHJpY2Ugc2FsZSAKCmBgYHtyfQptYXAyIDwtIGxlYWZsZXQobW8pICU+JSAKICBhZGRUaWxlcygpICU+JQogIGFkZFBvbHlnb25zKGNvbG9yID0gIiM0NDQ0NDQiLCB3ZWlnaHQgPSAxLCBzbW9vdGhGYWN0b3IgPSAwLjUsCiAgICAgICAgICAgICAgb3BhY2l0eSA9IDEuMCwgZmlsbE9wYWNpdHkgPSAwLjUsCiAgICAgICAgICAgICAgZmlsbENvbG9yID0gfnBhbDIobWVhbl9wcmljZV9zcSksCiAgICAgICAgICAgICAgaGlnaGxpZ2h0T3B0aW9ucyA9IGhpZ2hsaWdodE9wdGlvbnMoY29sb3IgPSAid2hpdGUiLCB3ZWlnaHQgPSAyLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGJyaW5nVG9Gcm9udCA9IFRSVUUpLAogICAgICAgICAgICAgIGxhYmVsID0gbGFiZWxzLAogICAgICAgICAgICAgIGxhYmVsT3B0aW9ucyA9IGxhYmVsT3B0aW9ucygKICAgICAgICAgICAgICAgIHN0eWxlID0gbGlzdCgiZm9udC13ZWlnaHQiID0gIm5vcm1hbCIsIHBhZGRpbmcgPSAiM3B4IDhweCIpLAogICAgICAgICAgICAgICAgdGV4dHNpemUgPSAiMTVweCIsCiAgICAgICAgICAgICAgICBkaXJlY3Rpb24gPSAiYXV0byIpKSAlPiUKICBhZGRMZWdlbmQocGFsID0gcGFsMiwgdmFsdWVzID0gfm1lYW5fcHJpY2Vfc3EsIG9wYWNpdHkgPSAxLCB0aXRsZSA9ICJBdmVyYWdlIHNlbGxpbmcgcHJpY2VzIikKbWFwMgpgYGAKCgo=